import pandas as pdimport altair as altalt.data_transformers.enable("vegafusion")url ="https://calvin-data304.netlify.app/data/wvs.csv"df = pd.read_csv(url)print(df.head())
country_counts = df['country'].value_counts().reset_index()country_counts.columns = ['country', 'Number of Respondents']sorted_countries = country_counts.sort_values(by='Number of Respondents', ascending=False)chart = alt.Chart(sorted_countries).mark_bar().encode( x=alt.X('country:N', sort=alt.EncodingSortField(field='Number of Respondents', op='sum', order='descending')), y='Number of Respondents:Q', color=alt.value('steelblue')).properties( width=800, title='Number of Respondents in Each Country')chart.configure_axis( labelFontSize=12, titleFontSize=14).configure_title( fontSize=16).configure_legend( titleFontSize=14, labelFontSize=12).configure_header( titleFontSize=16, labelFontSize=14).configure_title( anchor='start')chart
There is a notable difference among the countries.
Code
print("Unique values in age3:", df['age3'].unique())chart1 = alt.Chart(df).mark_bar().encode( x='age3', y='count()', color='country').properties( title="Age Groups (3 categories) by Country")chart1
Unique values in age3: [2 3 1]
Code
print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_bar().encode( x='age6', y='count()', color='country').properties( title="Age Groups (6 categories) by Country")chart2
Unique values in age6: [3 5 4 2 6 1]
Code
print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_line(point=True).encode( x='age6:N', y='count()', color='country:N').properties( title="Age Groups (6 categories) by Country").facet( facet='country:N')chart2
Unique values in age6: [3 5 4 2 6 1]
Code
print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_line().encode( x='age6:N', y=alt.Y('average(democracy_importance):Q', title='Average democracy importance'), color='country:N').properties( title="Average Age Groups (6 categories) by Country").facet( facet='country:N')chart2
Unique values in age6: [3 5 4 2 6 1]
Code
print("Unique values in age:", df['age'].unique())chart2 = alt.Chart(df).mark_line().encode( x='age:N', y=alt.Y('average(democracy_importance):Q', title='Average democracy importance'), color='country:N').properties( title="Average Age Groups by Country").facet( facet='country:N')chart2
# Linear Regressionlinear_reg = alt.Chart(df).mark_point().encode( x='age', y='weight').properties( title='Scatter Plot with Linear Regression')linear_reg += linear_reg.transform_regression('age', 'weight').mark_line()# Polynomial Regressionpoly_reg = alt.Chart(df).mark_point().encode( x='age', y='weight').properties( title='Scatter Plot with Polynomial Regression')poly_reg += poly_reg.transform_regression('age', 'weight', order=3).mark_line()linear_reg | poly_reg
Source Code
---title: "HW7"author: "Danyili Hong"format: html: embed-resources: true code-tools: true code-fold: true---```{python}import pandas as pdimport altair as altalt.data_transformers.enable("vegafusion")url ="https://calvin-data304.netlify.app/data/wvs.csv"df = pd.read_csv(url)print(df.head())```2.```{python}country_counts = df['country'].value_counts().reset_index()country_counts.columns = ['country', 'Number of Respondents']sorted_countries = country_counts.sort_values(by='Number of Respondents', ascending=False)chart = alt.Chart(sorted_countries).mark_bar().encode( x=alt.X('country:N', sort=alt.EncodingSortField(field='Number of Respondents', op='sum', order='descending')), y='Number of Respondents:Q', color=alt.value('steelblue')).properties( width=800, title='Number of Respondents in Each Country')chart.configure_axis( labelFontSize=12, titleFontSize=14).configure_title( fontSize=16).configure_legend( titleFontSize=14, labelFontSize=12).configure_header( titleFontSize=16, labelFontSize=14).configure_title( anchor='start')chart```There is a notable difference among the countries. 3.```{python}print("Unique values in age3:", df['age3'].unique())chart1 = alt.Chart(df).mark_bar().encode( x='age3', y='count()', color='country').properties( title="Age Groups (3 categories) by Country")chart1``````{python}print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_bar().encode( x='age6', y='count()', color='country').properties( title="Age Groups (6 categories) by Country")chart2```4.```{python}print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_line(point=True).encode( x='age6:N', y='count()', color='country:N').properties( title="Age Groups (6 categories) by Country").facet( facet='country:N')chart2```5.```{python}print("Unique values in age6:", df['age6'].unique())chart2 = alt.Chart(df).mark_line().encode( x='age6:N', y=alt.Y('average(democracy_importance):Q', title='Average democracy importance'), color='country:N').properties( title="Average Age Groups (6 categories) by Country").facet( facet='country:N')chart2```6.```{python}print("Unique values in age:", df['age'].unique())chart2 = alt.Chart(df).mark_line().encode( x='age:N', y=alt.Y('average(democracy_importance):Q', title='Average democracy importance'), color='country:N').properties( title="Average Age Groups by Country").facet( facet='country:N')chart2```It's worse. Hard to see at one sight. 8.```{python}# Linear Regressionlinear_reg = alt.Chart(df).mark_point().encode( x='age', y='weight').properties( title='Scatter Plot with Linear Regression')linear_reg += linear_reg.transform_regression('age', 'weight').mark_line()# Polynomial Regressionpoly_reg = alt.Chart(df).mark_point().encode( x='age', y='weight').properties( title='Scatter Plot with Polynomial Regression')poly_reg += poly_reg.transform_regression('age', 'weight', order=3).mark_line()linear_reg | poly_reg```